library(readr)
library(ggplot2)
library(dplyr)
library(methods)
library(stringi)
library(keras)

Set-up

Read in the following libraries:

library(readr)
library(dplyr)
library(glmnet)
## Loading required package: Matrix
## Loading required package: foreach
## Loaded glmnet 2.0-16
library(keras)

Today we are going to look at image classification from 10 classes of images. Get the imagenette-320.zip here:

Once this is downloaded, you’ll have to run something like in the notes to construct the training data and build a prediction model. Try a few things with the dataset before moving on… Can you use a different transfer model or grab a different internal layer? How does that influence the predictions?

For this lab, just upload the Rmd file rather than your predictions

Run transfer learning

Here, I grabbed the ResNet50 model and the penultimate layer.

resnet50 <- application_resnet50(weights = 'imagenet', include_top = TRUE)
model_avg_pool <- keras_model(inputs = resnet50$input,
                              outputs = get_layer(resnet50, 'avg_pool')$output)

Next, read in the dataset. This should work with a different input provided you structure the dataset the same way.

input_dir <- "../notes/image_data/imagenette-320/"

image_paths <- dir(input_dir, recursive = TRUE)
ext <- stri_match(image_paths, regex = "\\.([A-Za-z]+$)")[,2]
image_paths <- image_paths[stri_trans_tolower(ext) %in% c("jpg", "png", "jpeg")]
class_vector <- dirname(image_paths)
class_names <- levels(factor(class_vector))

n <- length(class_vector)
Z <- array(0, dim = c(n, 224, 224, 3))
y <- as.numeric(factor(class_vector)) - 1L
for (i in seq_len(n))
{
  pt <- file.path(input_dir, image_paths[i])
  image <- image_to_array(image_load(pt, target_size = c(224,224)))
  Z[i,,,] <- array_reshape(image, c(1, dim(image)))
}

set.seed(1)
index <- sample(seq_len(nrow(Z)))
Z <- Z[index,,,]
y <- y[index]

Now, produce the embeddings:

X <- predict(model_avg_pool, x = imagenet_preprocess_input(Z), verbose = TRUE)
dim(X)
## [1]  500 2048

With the new embedding matrix, let’s construct a training dataset. Here I am using a 60/40 split, but you can always modify this.

train_id <- sample(c("train", "valid"), nrow(X), TRUE, prob = c(0.6, 0.4))

X_train <- X[train_id == "train",]                  # Note: X is a matrix
y_train <- to_categorical(y[train_id == "train"])

With this dataset, we can fit any model that we want, though its easy enough to just use a neural network:

model <- keras_model_sequential()
model %>%
  layer_dense(units = 256, input_shape = ncol(X_train)) %>%
  layer_activation(activation = "relu") %>%
  layer_dropout(rate = 0.5) %>%

  layer_dense(units = 256) %>%
  layer_activation(activation = "relu") %>%
  layer_dropout(rate = 0.5) %>%

  layer_dense(units = ncol(y_train)) %>%
  layer_activation(activation = "softmax")

model %>% compile(loss = 'categorical_crossentropy',
                  optimizer = optimizer_rmsprop(lr = 0.001 / 2),
                  metrics = c('accuracy'))

history <- model %>%
  fit(X_train, y_train, epochs = 8)
plot(history)

How well does this model make predictions? It almost perfectly fits the training set end gets over 98% correct on the test set, this with a fairly complex task and only a few hundred training examples.

y_pred <- predict_classes(model, X)
tapply(y == y_pred, train_id, mean)
##     train     valid 
## 1.0000000 0.9563107

Here is the confusion matrix:

table(value = class_names[y + 1L], prediction = class_names[y_pred + 1L], train_id)
## , , train_id = train
## 
##                   prediction
## value              cassette_player chain_saw church English_springer
##   cassette_player               36         0      0                0
##   chain_saw                      0        29      0                0
##   church                         0         0     36                0
##   English_springer               0         0      0               27
##   French_horn                    0         0      0                0
##   garbage_truck                  0         0      0                0
##   gas_pump                       0         0      0                0
##   golf_ball                      0         0      0                0
##   parachute                      0         0      0                0
##   tench                          0         0      0                0
##                   prediction
## value              French_horn garbage_truck gas_pump golf_ball parachute
##   cassette_player            0             0        0         0         0
##   chain_saw                  0             0        0         0         0
##   church                     0             0        0         0         0
##   English_springer           0             0        0         0         0
##   French_horn               35             0        0         0         0
##   garbage_truck              0            28        0         0         0
##   gas_pump                   0             0       25         0         0
##   golf_ball                  0             0        0        24         0
##   parachute                  0             0        0         0        27
##   tench                      0             0        0         0         0
##                   prediction
## value              tench
##   cassette_player      0
##   chain_saw            0
##   church               0
##   English_springer     0
##   French_horn          0
##   garbage_truck        0
##   gas_pump             0
##   golf_ball            0
##   parachute            0
##   tench               27
## 
## , , train_id = valid
## 
##                   prediction
## value              cassette_player chain_saw church English_springer
##   cassette_player               13         1      0                0
##   chain_saw                      0        19      0                0
##   church                         0         0     14                0
##   English_springer               0         0      0               23
##   French_horn                    0         0      0                0
##   garbage_truck                  0         0      0                0
##   gas_pump                       1         0      2                0
##   golf_ball                      1         0      0                0
##   parachute                      0         0      1                0
##   tench                          0         0      0                0
##                   prediction
## value              French_horn garbage_truck gas_pump golf_ball parachute
##   cassette_player            0             0        0         0         0
##   chain_saw                  2             0        0         0         0
##   church                     0             0        0         0         0
##   English_springer           0             0        0         0         0
##   French_horn               15             0        0         0         0
##   garbage_truck              0            22        0         0         0
##   gas_pump                   1             0       21         0         0
##   golf_ball                  0             0        0        25         0
##   parachute                  0             0        0         0        22
##   tench                      0             0        0         0         0
##                   prediction
## value              tench
##   cassette_player      0
##   chain_saw            0
##   church               0
##   English_springer     0
##   French_horn          0
##   garbage_truck        0
##   gas_pump             0
##   golf_ball            0
##   parachute            0
##   tench               23

We can also look at some negative examples, but there really are not many here:

par(mfrow = c(2, 3))
id <- which(y_pred != y)
for (i in id) {
  par(mar = rep(0, 4L))
  plot(0,0,xlim=c(0,1),ylim=c(0,1),axes= FALSE,type = "n")
  rasterImage(Z[i,,,] /255,0,0,1,1)
  text(0.5, 0.1, label = class_names[y_pred[i] + 1L], col = "red", cex=2)
}

Finally, we can also find those examples that have the highest probability of being in a class. First, get all of the probabilities:

y_probs <- predict(model, X)

Then, this code gives the highest classification rate for each types (a bit modified from the notes):

id <- apply(y_probs, 2, which.max)

par(mfrow = c(3, 4))
for (i in id) try({
  par(mar = rep(0, 4L))
  plot(0,0,xlim=c(0,1),ylim=c(0,1),axes= FALSE,type = "n", asp=1)
  rasterImage(Z[i,,,] /256,0,0,1,1)
  text(0.5, 0.1, label = class_names[y[i] + 1L], col = "red", cex=2)
})

A bit of visualization

Let’s try to visualize the embedding itself using principle components again:

pca <- as_tibble(prcomp(X)$x[,1:2])
pca$y <- class_names[y + 1L]

And then plot it:

ggplot(pca, aes(PC1, PC2)) +
  geom_point(aes(color = y), size = 4) +
  labs(x = "", y = "", color = "class") +
  theme_minimal()